{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from livestats import livestats\n",
    "from math import sqrt\n",
    "import dautil as dl\n",
    "import numpy as np\n",
    "from scipy.stats import skew\n",
    "from scipy.stats import kurtosis\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "context = dl.nb.Context('calculating_moments')\n",
    "lr = dl.nb.LatexRenderer(chapter=12, context=context)\n",
    "lr.render(r'\\delta\\! = x - m')\n",
    "lr.render(r'm\\' = m + \\frac{\\delta}{n}')\n",
    "lr.render(r'M_2\\' = M_2 + \\delta^2 \\frac{ n-1}{n}')\n",
    "lr.render(r'M_3\\' = M_3 + \\delta^3 \\frac{ (n - 1) (n - 2)}{n^2} - \\frac{3\\delta M_2}{n}')\n",
    "lr.render(r'M_4\\' = M_4 + \\frac{\\delta^4 (n - 1) (n^2 - 3n + 3)}{n^3} + \\frac{6\\delta^2 M_2}{n^2} - \\frac{4\\delta M_3}{n}')\n",
    "lr.render(r'g_1 = \\frac{\\sqrt{n} M_3}{M_2^{3/2}}')\n",
    "lr.render(r'g_2 = \\frac{n M_4}{M_2^2}-3.')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# From https://en.wikipedia.org/wiki/\n",
    "# Algorithms_for_calculating_variance\n",
    "def online_kurtosis(data):\n",
    "    n = 0\n",
    "    mean = 0\n",
    "    M2 = 0\n",
    "    M3 = 0\n",
    "    M4 = 0\n",
    "    stats = []\n",
    "\n",
    "    for x in data:\n",
    "        n1 = n\n",
    "        n = n + 1\n",
    "        delta = x - mean\n",
    "        delta_n = delta / n\n",
    "        delta_n2 = delta_n ** 2\n",
    "        term1 = delta * delta_n * n1\n",
    "        mean = mean + delta_n\n",
    "        M4 = M4 + term1 * delta_n2 * (n**2 - 3*n + 3) + \\\n",
    "            6 * delta_n2 * M2 - 4 * delta_n * M3\n",
    "        M3 = M3 + term1 * delta_n * (n - 2) - 3 * delta_n * M2\n",
    "        M2 = M2 + term1\n",
    "        s = sqrt(n) * M3 / sqrt(M2 ** 3)\n",
    "        k = (n*M4) / (M2**2) - 3\n",
    "        stats.append((mean, sqrt(M2/(n - 1)), s, k))\n",
    "\n",
    "    return np.array(stats)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "test = livestats.LiveStats([0.25, 0.5, 0.75])\n",
    "\n",
    "data = dl.data.Weather.load()['TEMP'].\\\n",
    "    resample('M').dropna().values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "ls = []\n",
    "truth = []\n",
    "\n",
    "test.add(data[0])\n",
    "\n",
    "for i in range(1, len(data)):\n",
    "    test.add(data[i])\n",
    "    q1, q2, q3 = test.quantiles()\n",
    "\n",
    "    ls.append((test.mean(), sqrt(test.variance()),\n",
    "              test.skewness(), test.kurtosis(), q1[1], q2[1], q3[1]))\n",
    "    slice = data[:i]\n",
    "    truth.append((slice.mean(), slice.std(),\n",
    "                  skew(slice), kurtosis(slice),\n",
    "                  np.percentile(slice, 25), np.median(slice),\n",
    "                  np.percentile(slice, 75)))\n",
    "\n",
    "ls = np.array(ls)\n",
    "truth = np.array(truth)\n",
    "ok = online_kurtosis(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "dl.nb.RcWidget(context)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "dl.options.mimic_seaborn()\n",
    "cp = dl.plotting.CyclePlotter(plt.gca())\n",
    "cp.plot(ls.T[0], label='LiveStats')\n",
    "cp.plot(truth.T[0], label='Truth')\n",
    "cp.plot(data)\n",
    "plt.title('Live Stats Means')\n",
    "plt.xlabel('# points')\n",
    "plt.ylabel('Mean')\n",
    "plt.legend(loc='best')\n",
    "\n",
    "plt.figure()\n",
    "\n",
    "mses = [dl.stats.mse(truth.T[i], ls.T[i])\n",
    "        for i in range(7)]\n",
    "mses.extend([dl.stats.mse(truth.T[i], ok[1:].T[i])\n",
    "             for i in range(4)])\n",
    "dl.plotting.bar(plt.gca(),\n",
    "                ['mean', 'std', 'skew', 'kurt',\n",
    "                 'q1', 'q2', 'q3',\n",
    "                 'my_mean', 'my_std', 'my_skew', 'my_kurt'], mses)\n",
    "plt.title('MSEs for Various Statistics')\n",
    "plt.ylabel('MSE')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.4.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
